%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pylab
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20, 16
import warnings
import itertools
warnings.filterwarnings("ignore") # specify to ignore warning messages
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pylab
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
rcParams['figure.figsize'] = 20, 16
import warnings
import itertools
warnings.filterwarnings("ignore") # specify to ignore warning messages
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pylab
import matplotlib.pyplot as plt
from matplotlib.pylab import rcParams
import itertools
# in current versions of ipython, rcParams can be lost between cells if combined with other matplotlib calls
# See https://github.com/ipython/ipython/issues/11098
rcParams['figure.figsize'] = 17.5, 14
df = pd.read_csv("MER_T12_06.csv")
df.head()
| MSN | YYYYMM | Value | Column_Order | Description | Unit | |
|---|---|---|---|---|---|---|
| 0 | CLEIEUS | 197301 | 72.076 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1 | CLEIEUS | 197302 | 64.442 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 2 | CLEIEUS | 197303 | 64.084 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 3 | CLEIEUS | 197304 | 60.842 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 4 | CLEIEUS | 197305 | 61.798 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 5094 entries, 0 to 5093 Data columns (total 6 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 MSN 5094 non-null object 1 YYYYMM 5094 non-null int64 2 Value 5094 non-null object 3 Column_Order 5094 non-null int64 4 Description 5094 non-null object 5 Unit 5094 non-null object dtypes: int64(2), object(4) memory usage: 238.9+ KB
dateparse = lambda x: pd.to_datetime(x, format='%Y%m', errors = 'coerce')
df = pd.read_csv("MER_T12_06.csv", parse_dates=['YYYYMM'], index_col='YYYYMM', date_parser=dateparse)
df.head()
| MSN | Value | Column_Order | Description | Unit | |
|---|---|---|---|---|---|
| YYYYMM | |||||
| 1973-01-01 | CLEIEUS | 72.076 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | CLEIEUS | 64.442 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | CLEIEUS | 64.084 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | CLEIEUS | 60.842 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | CLEIEUS | 61.798 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
df.head(15)
| MSN | Value | Column_Order | Description | Unit | |
|---|---|---|---|---|---|
| YYYYMM | |||||
| 1973-01-01 | CLEIEUS | 72.076 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | CLEIEUS | 64.442 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | CLEIEUS | 64.084 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | CLEIEUS | 60.842 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | CLEIEUS | 61.798 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-06-01 | CLEIEUS | 66.538 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-07-01 | CLEIEUS | 72.626 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-08-01 | CLEIEUS | 75.181 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-09-01 | CLEIEUS | 68.397 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-10-01 | CLEIEUS | 67.668 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-11-01 | CLEIEUS | 67.021 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-12-01 | CLEIEUS | 71.118 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| NaT | CLEIEUS | 811.791 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1974-01-01 | CLEIEUS | 70.55 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1974-02-01 | CLEIEUS | 62.929 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
ts = df[pd.Series(pd.to_datetime(df.index, errors='coerce')).notnull().values]
ts.head(15)
| MSN | Value | Column_Order | Description | Unit | |
|---|---|---|---|---|---|
| YYYYMM | |||||
| 1973-01-01 | CLEIEUS | 72.076 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | CLEIEUS | 64.442 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | CLEIEUS | 64.084 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | CLEIEUS | 60.842 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | CLEIEUS | 61.798 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-06-01 | CLEIEUS | 66.538 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-07-01 | CLEIEUS | 72.626 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-08-01 | CLEIEUS | 75.181 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-09-01 | CLEIEUS | 68.397 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-10-01 | CLEIEUS | 67.668 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-11-01 | CLEIEUS | 67.021 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-12-01 | CLEIEUS | 71.118 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1974-01-01 | CLEIEUS | 70.55 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1974-02-01 | CLEIEUS | 62.929 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1974-03-01 | CLEIEUS | 64.519 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
ts.dtypes
MSN object Value object Column_Order int64 Description object Unit object dtype: object
#ss = ts.copy(deep=True)
ts['Value'] = pd.to_numeric(ts['Value'] , errors='coerce')
ts.head()
| MSN | Value | Column_Order | Description | Unit | |
|---|---|---|---|---|---|
| YYYYMM | |||||
| 1973-01-01 | CLEIEUS | 72.076 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | CLEIEUS | 64.442 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | CLEIEUS | 64.084 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | CLEIEUS | 60.842 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | CLEIEUS | 61.798 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
ts.info()
<class 'pandas.core.frame.DataFrame'> DatetimeIndex: 4707 entries, 1973-01-01 to 2016-07-01 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 MSN 4707 non-null object 1 Value 4323 non-null float64 2 Column_Order 4707 non-null int64 3 Description 4707 non-null object 4 Unit 4707 non-null object dtypes: float64(1), int64(1), object(3) memory usage: 220.6+ KB
Energy_sources = ts.groupby('Description')
Energy_sources.head()
| MSN | Value | Column_Order | Description | Unit | |
|---|---|---|---|---|---|
| YYYYMM | |||||
| 1973-01-01 | CLEIEUS | 72.076 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | CLEIEUS | 64.442 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | CLEIEUS | 64.084 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | CLEIEUS | 60.842 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | CLEIEUS | 61.798 | 1 | Coal Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-01-01 | NNEIEUS | 12.175 | 2 | Natural Gas Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | NNEIEUS | 11.708 | 2 | Natural Gas Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | NNEIEUS | 13.994 | 2 | Natural Gas Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | NNEIEUS | 14.627 | 2 | Natural Gas Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | NNEIEUS | 17.344 | 2 | Natural Gas Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-01-01 | DKEIEUS | 2.375 | 3 | Distillate Fuel, Including Kerosene-Type Jet F... | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | DKEIEUS | 2.061 | 3 | Distillate Fuel, Including Kerosene-Type Jet F... | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | DKEIEUS | 1.171 | 3 | Distillate Fuel, Including Kerosene-Type Jet F... | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | DKEIEUS | 1.022 | 3 | Distillate Fuel, Including Kerosene-Type Jet F... | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | DKEIEUS | 0.949 | 3 | Distillate Fuel, Including Kerosene-Type Jet F... | Million Metric Tons of Carbon Dioxide |
| 1973-01-01 | PCEIEUS | 0.128 | 4 | Petroleum Coke Electric Power Sector CO2 Emiss... | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | PCEIEUS | 0.106 | 4 | Petroleum Coke Electric Power Sector CO2 Emiss... | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | PCEIEUS | 0.083 | 4 | Petroleum Coke Electric Power Sector CO2 Emiss... | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | PCEIEUS | 0.130 | 4 | Petroleum Coke Electric Power Sector CO2 Emiss... | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | PCEIEUS | 0.167 | 4 | Petroleum Coke Electric Power Sector CO2 Emiss... | Million Metric Tons of Carbon Dioxide |
| 1973-01-01 | RFEIEUS | 24.867 | 5 | Residual Fuel Oil Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | RFEIEUS | 20.867 | 5 | Residual Fuel Oil Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | RFEIEUS | 19.780 | 5 | Residual Fuel Oil Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | RFEIEUS | 16.562 | 5 | Residual Fuel Oil Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | RFEIEUS | 17.754 | 5 | Residual Fuel Oil Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-01-01 | PAEIEUS | 27.369 | 6 | Petroleum Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | PAEIEUS | 23.034 | 6 | Petroleum Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | PAEIEUS | 21.034 | 6 | Petroleum Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | PAEIEUS | 17.714 | 6 | Petroleum Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | PAEIEUS | 18.870 | 6 | Petroleum Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-01-01 | GEEIEUS | NaN | 7 | Geothermal Energy Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | GEEIEUS | NaN | 7 | Geothermal Energy Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | GEEIEUS | NaN | 7 | Geothermal Energy Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | GEEIEUS | NaN | 7 | Geothermal Energy Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | GEEIEUS | NaN | 7 | Geothermal Energy Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-01-01 | NWEIEUS | NaN | 8 | Non-Biomass Waste Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | NWEIEUS | NaN | 8 | Non-Biomass Waste Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | NWEIEUS | NaN | 8 | Non-Biomass Waste Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | NWEIEUS | NaN | 8 | Non-Biomass Waste Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | NWEIEUS | NaN | 8 | Non-Biomass Waste Electric Power Sector CO2 Em... | Million Metric Tons of Carbon Dioxide |
| 1973-01-01 | TXEIEUS | 111.621 | 9 | Total Energy Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-02-01 | TXEIEUS | 99.185 | 9 | Total Energy Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-03-01 | TXEIEUS | 99.112 | 9 | Total Energy Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-04-01 | TXEIEUS | 93.183 | 9 | Total Energy Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
| 1973-05-01 | TXEIEUS | 98.012 | 9 | Total Energy Electric Power Sector CO2 Emissions | Million Metric Tons of Carbon Dioxide |
fig, ax = plt.subplots()
for desc, group in Energy_sources:
group.plot(y='Value', label=desc,ax = ax, title='Carbon Emissions per Energy Source', fontsize = 20)
ax.set_xlabel('Time(Monthly)')
ax.set_ylabel('Carbon Emissions in MMT')
ax.xaxis.label.set_size(20)
ax.yaxis.label.set_size(20)
ax.legend(fontsize = 16)
fig, axes = plt.subplots(3,3, figsize = (30, 20))
for (desc, group), ax in zip(Energy_sources, axes.flatten()):
group.plot(y='Value',ax = ax, title=desc, fontsize = 18)
ax.set_xlabel('Time(Monthly)')
ax.set_ylabel('Carbon Emissions in MMT')
ax.xaxis.label.set_size(18)
ax.yaxis.label.set_size(18)
CO2_per_source = ts.groupby('Description')['Value'].sum().sort_values()
# I want to use shorter descriptions for the energy sources
CO2_per_source.index
Index(['Geothermal Energy Electric Power Sector CO2 Emissions',
'Non-Biomass Waste Electric Power Sector CO2 Emissions',
'Petroleum Coke Electric Power Sector CO2 Emissions',
'Distillate Fuel, Including Kerosene-Type Jet Fuel, Oil Electric Power Sector CO2 Emissions',
'Residual Fuel Oil Electric Power Sector CO2 Emissions',
'Petroleum Electric Power Sector CO2 Emissions',
'Natural Gas Electric Power Sector CO2 Emissions',
'Coal Electric Power Sector CO2 Emissions',
'Total Energy Electric Power Sector CO2 Emissions'],
dtype='object', name='Description')
cols = ['Geothermal Energy', 'Non-Biomass Waste', 'Petroleum Coke','Distillate Fuel ',
'Residual Fuel Oil', 'Petroleum', 'Natural Gas', 'Coal', 'Total Emissions']
fig = plt.figure(figsize = (16,9))
x_label = cols
x_tick = np.arange(len(cols))
plt.bar(x_tick, CO2_per_source, align = 'center', alpha = 0.5)
fig.suptitle("CO2 Emissions by Electric Power Sector", fontsize= 25)
plt.xticks(x_tick, x_label, rotation = 70, fontsize = 20)
plt.yticks(fontsize = 20)
plt.xlabel('Carbon Emissions in MMT', fontsize = 20)
plt.show()
Emissions = ts.iloc[:,1:] # Monthly total emissions (mte)
Emissions= Emissions.groupby(['Description', pd.Grouper(freq='M')])['Value'].sum().unstack(level = 0)
mte = Emissions['Natural Gas Electric Power Sector CO2 Emissions'] # monthly total emissions (mte)
mte.head()
YYYYMM 1973-01-31 12.175 1973-02-28 11.708 1973-03-31 13.994 1973-04-30 14.627 1973-05-31 17.344 Freq: M, Name: Natural Gas Electric Power Sector CO2 Emissions, dtype: float64
mte.tail()
YYYYMM 2016-03-31 40.525 2016-04-30 39.763 2016-05-31 44.210 2016-06-30 53.567 2016-07-31 62.881 Freq: M, Name: Natural Gas Electric Power Sector CO2 Emissions, dtype: float64
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller
plt.plot(mte)
[<matplotlib.lines.Line2D at 0x22168d64e20>]
def TestStationaryPlot(ts, plot_label = None):
rol_mean = ts.rolling(window = 12, center = False).mean()
rol_std = ts.rolling(window = 12, center = False).std()
plt.plot(ts, color = 'blue',label = 'Original Data')
plt.plot(rol_mean, color = 'red', label = 'Rolling Mean')
plt.plot(rol_std, color ='black', label = 'Rolling Std')
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)
plt.xlabel('Time in Years', fontsize = 25)
plt.ylabel('Total Emissions', fontsize = 25)
plt.legend(loc='best', fontsize = 25)
if plot_label is not None:
plt.title('Rolling Mean & Standard Deviation (' + plot_label + ')', fontsize = 25)
else:
plt.title('Rolling Mean & Standard Deviation', fontsize = 25)
plt.show(block= True)
def TestStationaryAdfuller(ts, cutoff = 0.01):
ts_test = adfuller(ts, autolag = 'AIC')
ts_test_output = pd.Series(ts_test[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in ts_test[4].items():
ts_test_output['Critical Value (%s)'%key] = value
print(ts_test_output)
if ts_test[1] <= cutoff:
print("Strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root, hence it is stationary")
else:
print("Weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
TestStationaryPlot(mte, 'unmodified data')
TestStationaryAdfuller(mte)
Test Statistic 1.831215 p-value 0.998409 #Lags Used 19.000000 Number of Observations Used 503.000000 Critical Value (1%) -3.443418 Critical Value (5%) -2.867303 Critical Value (10%) -2.569840 dtype: float64 Weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary
moving_avg = mte.rolling(12).mean()
plt.plot(mte)
plt.plot(moving_avg, color='red')
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)
plt.xlabel('Time (years)', fontsize = 25)
plt.ylabel('CO2 Emission (MMT)', fontsize = 25)
plt.title('CO2 emission from electric power generation', fontsize = 25)
plt.show()
mte_moving_avg_diff = mte - moving_avg
mte_moving_avg_diff.head(13)
YYYYMM 1973-01-31 NaN 1973-02-28 NaN 1973-03-31 NaN 1973-04-30 NaN 1973-05-31 NaN 1973-06-30 NaN 1973-07-31 NaN 1973-08-31 NaN 1973-09-30 NaN 1973-10-31 NaN 1973-11-30 NaN 1973-12-31 -4.705333 1974-01-31 -4.594333 Freq: M, Name: Natural Gas Electric Power Sector CO2 Emissions, dtype: float64
mte_moving_avg_diff.dropna(inplace=True)
TestStationaryPlot(mte_moving_avg_diff, 'moving average')
TestStationaryAdfuller(mte_moving_avg_diff)
Test Statistic -5.138977 p-value 0.000012 #Lags Used 19.000000 Number of Observations Used 492.000000 Critical Value (1%) -3.443711 Critical Value (5%) -2.867432 Critical Value (10%) -2.569908 dtype: float64 Strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root, hence it is stationary
mte_exp_weighted_avg = mte.ewm(halflife=12).mean()
plt.plot(mte)
plt.plot(mte_exp_weighted_avg, color='red')
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)
plt.xlabel('Time (years)', fontsize = 25)
plt.ylabel('CO2 Emission (MMT)', fontsize = 25)
plt.title('CO2 emission from electric power generation', fontsize = 25)
plt.show()
mte_ewma_diff = mte - mte_exp_weighted_avg
TestStationaryPlot(mte_ewma_diff, 'exp weighted moving avg')
TestStationaryAdfuller(mte_ewma_diff)
Test Statistic -3.423915 p-value 0.010170 #Lags Used 19.000000 Number of Observations Used 503.000000 Critical Value (1%) -3.443418 Critical Value (5%) -2.867303 Critical Value (10%) -2.569840 dtype: float64 Weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary
mte_first_difference = mte - mte.shift(1)
TestStationaryPlot(mte_first_difference.dropna(inplace=False), 'differencing')
TestStationaryAdfuller(mte_first_difference.dropna(inplace=False))
Test Statistic -5.435116 p-value 0.000003 #Lags Used 18.000000 Number of Observations Used 503.000000 Critical Value (1%) -3.443418 Critical Value (5%) -2.867303 Critical Value (10%) -2.569840 dtype: float64 Strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root, hence it is stationary
mte_seasonal_difference = mte - mte.shift(12)
TestStationaryPlot(mte_seasonal_difference.dropna(inplace=False), 'seasonality difference')
TestStationaryAdfuller(mte_seasonal_difference.dropna(inplace=False))
Test Statistic -4.412396 p-value 0.000282 #Lags Used 13.000000 Number of Observations Used 497.000000 Critical Value (1%) -3.443576 Critical Value (5%) -2.867373 Critical Value (10%) -2.569877 dtype: float64 Strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root, hence it is stationary
mte_seasonal_first_difference = mte_first_difference - mte_first_difference.shift(12)
TestStationaryPlot(mte_seasonal_first_difference.dropna(inplace=False), 'diff of seasonal diff')
TestStationaryAdfuller(mte_seasonal_first_difference.dropna(inplace=False))
Test Statistic -1.009743e+01 p-value 1.081539e-17 #Lags Used 1.200000e+01 Number of Observations Used 4.970000e+02 Critical Value (1%) -3.443576e+00 Critical Value (5%) -2.867373e+00 Critical Value (10%) -2.569877e+00 dtype: float64 Strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root, hence it is stationary
from statsmodels.tsa.seasonal import seasonal_decompose
decomposition = seasonal_decompose(mte)
trend = decomposition.trend
seasonal = decomposition.seasonal
residual = decomposition.resid
plt.subplot(411)
plt.plot(mte, label='Original')
plt.legend(loc='best')
plt.subplot(412)
plt.plot(trend, label='Trend')
plt.legend(loc='best')
plt.subplot(413)
plt.plot(seasonal,label='Seasonality')
plt.legend(loc='best')
plt.subplot(414)
plt.plot(residual, label='Residuals')
plt.legend(loc='best')
plt.tight_layout()
mte_decompose = residual
mte_decompose.dropna(inplace=True)
TestStationaryPlot(mte_decompose, 'decomposing')
TestStationaryAdfuller(mte_decompose)
Test Statistic -8.547084e+00 p-value 9.439345e-14 #Lags Used 1.900000e+01 Number of Observations Used 4.910000e+02 Critical Value (1%) -3.443739e+00 Critical Value (5%) -2.867444e+00 Critical Value (10%) -2.569915e+00 dtype: float64 Strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root, hence it is stationary
fig = plt.figure(figsize=(12,8))
ax1 = fig.add_subplot(211)
fig = sm.graphics.tsa.plot_acf(mte_seasonal_first_difference.iloc[13:], lags=40, ax=ax1)
ax2 = fig.add_subplot(212)
fig = sm.graphics.tsa.plot_pacf(mte_seasonal_first_difference.iloc[13:], lags=40, ax=ax2)
p = d = q = range(0, 2) # Define the p, d and q parameters to take any value between 0 and 2
pdq = list(itertools.product(p, d, q)) # Generate all different combinations of p, q and q triplets
pdq_x_QDQs = [(x[0], x[1], x[2], 12) for x in list(itertools.product(p, d, q))] # Generate all different combinations of seasonal p, q and q triplets
print('Examples of Seasonal ARIMA parameter combinations for Seasonal ARIMA...')
print('SARIMAX: {} x {}'.format(pdq[1], pdq_x_QDQs[1]))
print('SARIMAX: {} x {}'.format(pdq[2], pdq_x_QDQs[2]))
Examples of Seasonal ARIMA parameter combinations for Seasonal ARIMA... SARIMAX: (0, 0, 1) x (0, 0, 1, 12) SARIMAX: (0, 1, 0) x (0, 1, 0, 12)
aic_results = []
for param in pdq:
for seasonal_param in pdq_x_QDQs:
try:
mod = sm.tsa.statespace.SARIMAX(mte,
order=param,
seasonal_order=seasonal_param,
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print('ARIMA{}x{} - AIC:{}'.format(param, seasonal_param, results.aic))
if results.mle_retvals is not None and results.mle_retvals['converged'] == False:
print(results.mle_retvals)
aic_results.append(results.aic)
except:
continue
aic_results.sort()
print('Best AIC found: ', aic_results[0])
ARIMA(0, 0, 0)x(0, 0, 0, 12) - AIC:4804.065995091217 ARIMA(0, 0, 0)x(0, 0, 1, 12) - AIC:4135.625648186415 ARIMA(0, 0, 0)x(0, 1, 0, 12) - AIC:2572.1947577443725 ARIMA(0, 0, 0)x(0, 1, 1, 12) - AIC:2504.209502835845 ARIMA(0, 0, 0)x(1, 0, 0, 12) - AIC:2544.146426616797 ARIMA(0, 0, 0)x(1, 0, 1, 12) - AIC:2465.147262997527 ARIMA(0, 0, 0)x(1, 1, 0, 12) - AIC:2511.043139693216 ARIMA(0, 0, 0)x(1, 1, 1, 12) - AIC:2505.840238070917 ARIMA(0, 0, 1)x(0, 0, 0, 12) - AIC:4157.5612255158285 ARIMA(0, 0, 1)x(0, 0, 1, 12) - AIC:3572.105901694469 ARIMA(0, 0, 1)x(0, 1, 0, 12) - AIC:2334.724725408906 ARIMA(0, 0, 1)x(0, 1, 1, 12) - AIC:2245.50738642076 ARIMA(0, 0, 1)x(1, 0, 0, 12) - AIC:2329.0446013807305 ARIMA(0, 0, 1)x(1, 0, 1, 12) - AIC:2218.6800979008594 ARIMA(0, 0, 1)x(1, 1, 0, 12) - AIC:2262.0627979898695 ARIMA(0, 0, 1)x(1, 1, 1, 12) - AIC:2247.532772188899 ARIMA(0, 1, 0)x(0, 0, 0, 12) - AIC:2932.1335743055997 ARIMA(0, 1, 0)x(0, 0, 1, 12) - AIC:2616.012810818768 ARIMA(0, 1, 0)x(0, 1, 0, 12) - AIC:2329.0474119288942 ARIMA(0, 1, 0)x(0, 1, 1, 12) - AIC:2068.3633517950716 ARIMA(0, 1, 0)x(1, 0, 0, 12) - AIC:2295.7488324817023 ARIMA(0, 1, 0)x(1, 0, 1, 12) - AIC:2108.9566563046023 ARIMA(0, 1, 0)x(1, 1, 0, 12) - AIC:2162.6924217723586 ARIMA(0, 1, 0)x(1, 1, 1, 12) - AIC:2074.048153336999 ARIMA(0, 1, 1)x(0, 0, 0, 12) - AIC:2842.7367252748636 ARIMA(0, 1, 1)x(0, 0, 1, 12) - AIC:2581.5410372142014 ARIMA(0, 1, 1)x(0, 1, 0, 12) - AIC:2281.274819451042 ARIMA(0, 1, 1)x(0, 1, 1, 12) - AIC:2040.5852777126893 ARIMA(0, 1, 1)x(1, 0, 0, 12) - AIC:2268.802505329819 ARIMA(0, 1, 1)x(1, 0, 1, 12) - AIC:2080.7520754808124 ARIMA(0, 1, 1)x(1, 1, 0, 12) - AIC:2123.2977562954297 ARIMA(0, 1, 1)x(1, 1, 1, 12) - AIC:2044.9914025255468 ARIMA(1, 0, 0)x(0, 0, 0, 12) - AIC:2937.6528799025123 ARIMA(1, 0, 0)x(0, 0, 1, 12) - AIC:2620.0970602333864 ARIMA(1, 0, 0)x(0, 1, 0, 12) - AIC:2249.177786856718 ARIMA(1, 0, 0)x(0, 1, 1, 12) - AIC:2046.0322860770314 ARIMA(1, 0, 0)x(1, 0, 0, 12) - AIC:2251.161836459947 ARIMA(1, 0, 0)x(1, 0, 1, 12) - AIC:2067.590689465961 ARIMA(1, 0, 0)x(1, 1, 0, 12) - AIC:2107.823048920128 ARIMA(1, 0, 0)x(1, 1, 1, 12) - AIC:2050.9250459305504 ARIMA(1, 0, 1)x(0, 0, 0, 12) - AIC:2846.6852669758855 ARIMA(1, 0, 1)x(0, 0, 1, 12) - AIC:2584.2753823646394 ARIMA(1, 0, 1)x(0, 1, 0, 12) - AIC:2243.7605300915866 ARIMA(1, 0, 1)x(0, 1, 1, 12) - AIC:2031.9016416953855 ARIMA(1, 0, 1)x(1, 0, 0, 12) - AIC:2248.658609811775 ARIMA(1, 0, 1)x(1, 0, 1, 12) - AIC:2060.3626766460907 ARIMA(1, 0, 1)x(1, 1, 0, 12) - AIC:2098.620315637129 ARIMA(1, 0, 1)x(1, 1, 1, 12) - AIC:2036.2991709927785 ARIMA(1, 1, 0)x(0, 0, 0, 12) - AIC:2853.830983764321 ARIMA(1, 1, 0)x(0, 0, 1, 12) - AIC:2593.0675116185103 ARIMA(1, 1, 0)x(0, 1, 0, 12) - AIC:2308.6008467817137 ARIMA(1, 1, 0)x(0, 1, 1, 12) - AIC:2053.7191842052616 ARIMA(1, 1, 0)x(1, 0, 0, 12) - AIC:2280.908596857873 ARIMA(1, 1, 0)x(1, 0, 1, 12) - AIC:2093.6589890439723 ARIMA(1, 1, 0)x(1, 1, 0, 12) - AIC:2134.9031516532555 ARIMA(1, 1, 0)x(1, 1, 1, 12) - AIC:2058.3416689359046 ARIMA(1, 1, 1)x(0, 0, 0, 12) - AIC:2841.5426637573464 ARIMA(1, 1, 1)x(0, 0, 1, 12) - AIC:2582.752067342597 ARIMA(1, 1, 1)x(0, 1, 0, 12) - AIC:2240.141291888659 ARIMA(1, 1, 1)x(0, 1, 1, 12) - AIC:2003.5534515297795 ARIMA(1, 1, 1)x(1, 0, 0, 12) - AIC:2219.728927239087 ARIMA(1, 1, 1)x(1, 0, 1, 12) - AIC:2043.602047311111 ARIMA(1, 1, 1)x(1, 1, 0, 12) - AIC:2096.241267785961 ARIMA(1, 1, 1)x(1, 1, 1, 12) - AIC:2005.5002845408999 Best AIC found: 2003.5534515297795
mod = sm.tsa.statespace.SARIMAX(mte,
order=(1,1,1),
seasonal_order=(0,1,1,12),
enforce_stationarity=False,
enforce_invertibility=False)
results = mod.fit()
print(results.summary()) ## ARIMA(1, 1, 1)x(0, 1, 1, 12) - AIC:2003.5534515297795
SARIMAX Results
===========================================================================================================
Dep. Variable: Natural Gas Electric Power Sector CO2 Emissions No. Observations: 523
Model: SARIMAX(1, 1, 1)x(0, 1, 1, 12) Log Likelihood -997.777
Date: Mon, 19 Sep 2022 AIC 2003.553
Time: 17:42:57 BIC 2020.380
Sample: 01-31-1973 HQIC 2010.158
- 07-31-2016
Covariance Type: opg
==============================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------
ar.L1 0.6684 0.040 16.560 0.000 0.589 0.748
ma.L1 -0.9534 0.020 -46.799 0.000 -0.993 -0.914
ma.S.L12 -0.7287 0.027 -27.036 0.000 -0.782 -0.676
sigma2 3.2378 0.133 24.263 0.000 2.976 3.499
===================================================================================
Ljung-Box (L1) (Q): 0.23 Jarque-Bera (JB): 183.59
Prob(Q): 0.63 Prob(JB): 0.00
Heteroskedasticity (H): 7.58 Skew: 0.43
Prob(H) (two-sided): 0.00 Kurtosis: 5.85
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
results.resid.plot()
<AxesSubplot:xlabel='YYYYMM'>
print(results.resid.describe())
count 523.000000 mean 0.144267 std 1.885626 min -6.528426 25% -0.791260 50% 0.126975 75% 1.040650 max 12.175000 dtype: float64
results.resid.plot(kind='kde')
<AxesSubplot:ylabel='Density'>
results.plot_diagnostics(figsize=(15, 12))
plt.show()
pred = results.get_prediction(start = 480, end = 522, dynamic=False)
pred_ci = pred.conf_int()
pred_ci.head()
| lower Natural Gas Electric Power Sector CO2 Emissions | upper Natural Gas Electric Power Sector CO2 Emissions | |
|---|---|---|
| YYYYMM | ||
| 2013-01-31 | 30.203836 | 37.257325 |
| 2013-02-28 | 29.088381 | 36.141870 |
| 2013-03-31 | 28.958986 | 36.012475 |
| 2013-04-30 | 30.708074 | 37.761564 |
| 2013-05-31 | 32.104080 | 39.157570 |
# Using DataFrame.mean() method to get column average
df2 = pred_ci["lower Natural Gas Electric Power Sector CO2 Emissions"].mean()
print(df2)
36.70852497421219
df3 = pred_ci["upper Natural Gas Electric Power Sector CO2 Emissions"].mean()
print(df3)
43.762014782151304
((43.762014782151304-36.70852497421219)/(43.762014782151304+36.70852497421219))
0.0876530694250915
## CO2 emissions Reducing Rate : 8.76530694250915 %
(0.0876530694250915*100)
8.76530694250915
ax = mte['1973':].plot(label='observed')
pred.predicted_mean.plot(ax=ax, label='One-step ahead forecast', alpha=.7)
ax.fill_between(pred_ci.index,
pred_ci.iloc[:, 0],
pred_ci.iloc[:, 1], color='r', alpha=.5)
ax.set_xlabel('Time (years)')
ax.set_ylabel('NG CO2 Emissions')
plt.legend()
plt.show()
mte_forecast = pred.predicted_mean
mte_truth = mte['2013-01-31':]
# Compute the mean square error
mse = ((mte_forecast - mte_truth) ** 2).mean()
print('The Mean Squared Error (MSE) of the forecast is {}'.format(round(mse, 2)))
print('The Root Mean Square Error (RMSE) of the forecast: {:.4f}'
.format(np.sqrt(sum((mte_forecast-mte_truth)**2)/len(mte_forecast))))
The Mean Squared Error (MSE) of the forecast is 4.09 The Root Mean Square Error (RMSE) of the forecast: 2.0236
mte_pred_concat = pd.concat([mte_truth, mte_forecast])
pred_dynamic = results.get_prediction(start=pd.to_datetime('2013-01-31'), dynamic=True, full_results=True)
pred_dynamic_ci = pred_dynamic.conf_int()
ax = mte['1973':].plot(label='observed', figsize=(20, 15))
pred_dynamic.predicted_mean.plot(label='Dynamic Forecast', ax=ax)
ax.fill_between(pred_dynamic_ci.index,
pred_dynamic_ci.iloc[:, 0],
pred_dynamic_ci.iloc[:, 1],
color='r',
alpha=.3)
ax.fill_betweenx(ax.get_ylim(),
pd.to_datetime('2013-01-31'),
mte.index[-1],
alpha=.1, zorder=-1)
ax.set_xlabel('Time (years)')
ax.set_ylabel('CO2 Emissions')
plt.legend()
plt.show()
# Extract the predicted and true values of our time series
mte_forecast = pred_dynamic.predicted_mean
mte_original = mte['2013-01-31':]
# Compute the mean square error
mse = ((mte_forecast - mte_original) ** 2).mean()
print('The Mean Squared Error (MSE) of the forecast is {}'.format(round(mse, 2)))
print('The Root Mean Square Error (RMSE) of the forecast: {:.4f}'
.format(np.sqrt(sum((mte_forecast-mte_original)**2)/len(mte_forecast))))
The Mean Squared Error (MSE) of the forecast is 14.39 The Root Mean Square Error (RMSE) of the forecast: 3.7936
# Get forecast of 10 years or 120 months steps ahead in future
forecast = results.get_forecast(steps= 120)
# Get confidence intervals of forecasts
forecast_ci = forecast.conf_int()
forecast_ci.head()
| lower Natural Gas Electric Power Sector CO2 Emissions | upper Natural Gas Electric Power Sector CO2 Emissions | |
|---|---|---|
| 2016-08-31 | 58.062559 | 65.116049 |
| 2016-09-30 | 47.316615 | 55.987495 |
| 2016-10-31 | 40.736072 | 50.163095 |
| 2016-11-30 | 36.175924 | 46.010288 |
| 2016-12-31 | 38.095112 | 48.172699 |
ax = mte.plot(label='observed', figsize=(20, 15))
forecast.predicted_mean.plot(ax=ax, label='Forecast')
ax.fill_between(forecast_ci.index,
forecast_ci.iloc[:, 0],
forecast_ci.iloc[:, 1], color='g', alpha=.4)
ax.set_xlabel('Time (year)')
ax.set_ylabel('NG CO2 Emission level')
plt.legend()
plt.show()
CO2_per_source = ts.groupby('Description')['Value'].sum().sort_values()
CO2_per_source.index
Index(['Geothermal Energy Electric Power Sector CO2 Emissions',
'Non-Biomass Waste Electric Power Sector CO2 Emissions',
'Petroleum Coke Electric Power Sector CO2 Emissions',
'Distillate Fuel, Including Kerosene-Type Jet Fuel, Oil Electric Power Sector CO2 Emissions',
'Residual Fuel Oil Electric Power Sector CO2 Emissions',
'Petroleum Electric Power Sector CO2 Emissions',
'Natural Gas Electric Power Sector CO2 Emissions',
'Coal Electric Power Sector CO2 Emissions',
'Total Energy Electric Power Sector CO2 Emissions'],
dtype='object', name='Description')
cols = ['Geothermal Energy', 'Non-Biomass Waste', 'Petroleum Coke','Distillate Fuel ',
'Residual Fuel Oil', 'Petroleum', 'Natural Gas', 'Coal', 'Total Emissions']
fig = plt.figure(figsize = (16,9))
x_label = cols
x_tick = np.arange(len(cols))
plt.bar(x_tick, CO2_per_source, align = 'center', alpha = 0.5)
fig.suptitle("CO2 Emissions by Electric Power Sector", fontsize= 25)
plt.xticks(x_tick, x_label, rotation = 70, fontsize = 20)
plt.yticks(fontsize = 20)
plt.xlabel('Carbon Emissions in MMT', fontsize = 20)
plt.show()
#Emissions = ts.iloc[:,1:] # Monthly total emissions (mte)
#Emissions= Emissions.groupby(['Description', pd.TimeGrouper('M')])['Value'].sum().unstack(level = 0)
#mte = Emissions['Natural Gas Electric Power Sector CO2 Emissions'] # monthly total emissions (mte)
#mte.head()
import statsmodels
import statsmodels.api as sm
from statsmodels.tsa.stattools import coint, adfuller
plt.plot(mte)
[<matplotlib.lines.Line2D at 0x22161addfd0>]
def TestStationaryPlot(ts):
rol_mean = ts.rolling(window = 12, center = False).mean()
rol_std = ts.rolling(window = 12, center = False).std()
plt.plot(ts, color = 'blue',label = 'Original Data')
plt.plot(rol_mean, color = 'red', label = 'Rolling Mean')
plt.plot(rol_std, color ='black', label = 'Rolling Std')
plt.xticks(fontsize = 25)
plt.yticks(fontsize = 25)
plt.xlabel('Time in Years', fontsize = 25)
plt.ylabel('Total Emissions', fontsize = 25)
plt.legend(loc='best', fontsize = 25)
plt.title('Rolling Mean & Standard Deviation', fontsize = 25)
plt.show(block= True)
def TestStationaryAdfuller(ts, cutoff = 0.01):
ts_test = adfuller(ts, autolag = 'AIC')
ts_test_output = pd.Series(ts_test[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
for key,value in ts_test[4].items():
ts_test_output['Critical Value (%s)'%key] = value
print(ts_test_output)
if ts_test[1] <= cutoff:
print("Strong evidence against the null hypothesis, reject the null hypothesis. Data has no unit root, hence it is stationary")
else:
print("Weak evidence against null hypothesis, time series has a unit root, indicating it is non-stationary ")
TestStationaryPlot(mte)